# -*- coding:utf-8 -*-
# @Time : 2021/2/23 10:53
# @Author : Administrator
# @File : douban_moive.py
# @Software: PyCharm
# @Motto: good good study,day day up

import requests
from parsel import Selector
import pymongo
from loguru import logger

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["spiders"]
mycol = mydb["movie"]

for page in range(0, 250, 25):
    url = f'https://movie.douban.com/top250?start={page}&filter='
    logger.info(f'正在获取{url}网页的数据.......')
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
    }
    response = requests.get(url=url, headers=headers)
    selector = Selector(response.text)
    movies = selector.xpath('//ol[@class="grid_view"]//li')
    movie_list = []
    for movie in movies:
        title = movie.xpath('.//span[@class="title"]//text()').extract_first()
        movie_list = movie.xpath('.//div[@class="bd"]//p//text()').extract()
        star = movie_list[0].strip().replace('\xa0\xa0\xa0', '').replace('/...', '')
        movie_info = movie_list[1].strip().split('\xa0/\xa0')
        movie_time = movie_info[0]  # 电影上映时间
        movie_country = movie_info[1]  # 哪个国家的电影
        movie_type = movie_info[2]  # 什么类型的电影
        stars = movie.xpath('.//div[@class="star"]//span//text()').extract()
        inq = movie.xpath('.//p[@class="quote"]//span[@class="inq"]//text()').extract_first()
        item = {
            'title': title,
            'star': star,
            'movie_time': movie_time + '年',
            'movie_country': movie_country,
            'movie_type': movie_type,
            'rating_num': stars[0] + '分',
            'people': stars[1],
            'inq': inq
        }
        logger.info('正在导入mongo数据库........')
        mycol.insert_one(item)
        logger.info('写入完成.....')